ssm <- haven::read_sav("../data/SSM2015.sav") |>
  haven::zap_labels()

jsei <- readr::read_csv("https://raw.githubusercontent.com/ShoFujihara/OccupationalScales/master/SSM_sei_ssi_v1.0.csv")
jsei <- jsei |> dplyr::rename(Occ = ssm)

# Y/Outcome: Respondent health ----

ssm <- ssm |> 
  dplyr::mutate(SubjHealth = dplyr::case_match(dq20, 
                                               9:99 ~ NA_integer_, 
                                               .default = dq20), 
                SubjHealth = 6 - SubjHealth)

# A/Category: Parental education ----

ssm <- ssm |> 
  dplyr::mutate(FatherHighEdu = dplyr::case_match(q22_a,
                                                  c(1:5, 8:9) ~ 0,
                                                  c(6:7, 10:13) ~ 1,
                                                  c(14:99) ~ NA_integer_), 
                MotherHighEdu = dplyr::case_match(q22_b,
                                                  c(1:5, 8:9) ~ 0,
                                                  c(6:7, 10:13) ~ 1,
                                                  c(14:99) ~ NA_integer_), 
                ParentHighEdu = dplyr::case_when(FatherHighEdu == 1 & MotherHighEdu == 1 ~ 1, 
                                                 FatherHighEdu == 1 & MotherHighEdu == 0 ~ 1, 
                                                 FatherHighEdu == 0 & MotherHighEdu == 1 ~ 1, 
                                                 FatherHighEdu == 0 & MotherHighEdu == 0 ~ 0, 
                                                 FatherHighEdu == NA_integer_ & MotherHighEdu != NA_integer_ ~ MotherHighEdu, 
                                                 FatherHighEdu != NA_integer_ & MotherHighEdu == NA_integer_ ~ FatherHighEdu,
                                                 FatherHighEdu == NA_integer_ & MotherHighEdu == NA_integer_ ~ NA_integer_,
                                                 TRUE ~ NA_integer_), 
                ParentHighEdu_alt = dplyr::if_else(is.na(ParentHighEdu), 2, ParentHighEdu),
                ParentHighEdu = fct_recode(factor(ParentHighEdu), "Low" = "0", "High" = "1"), 
                ParentHighEdu_alt = fct_recode(factor(ParentHighEdu_alt), "Low" = "0", "High" = "1", "NA" = "2"))

# C/Treatment: Child education ----

# ## Child sex
# 
# ssm <- ssm |> 
#   dplyr::select(id, dq13_1_1, dq13_2_1, dq13_3_1, dq13_4_1) |> 
#   dplyr::rename(Sex_1 = dq13_1_1, 
#                 Sex_2 = dq13_2_1, 
#                 Sex_3 = dq13_3_1, 
#                 Sex_4 = dq13_4_1) |>
#   tidyr::pivot_longer(-id, 
#                       names_to = c("Sex", "BirthOrder"), 
#                       names_sep = "_") |> 
#   tidyr::pivot_wider(names_from = Sex, 
#                      values_from = value) |>
#   dplyr::mutate(ChildSex = dplyr::case_match(Sex,
#                                         1 ~ 0L, 
#                                         2 ~ 1L,
#                                         8:9 ~ NA_integer_)) |>
#   dplyr::select(id, ChildSex, BirthOrder) |>
#   dplyr::mutate(Names = "ChildSex") |> 
#   tidyr::pivot_wider(names_from = c("Names", "BirthOrder"), 
#                      values_from = ChildSex) |> 
#   dplyr::inner_join(ssm, ., by = "id")

## Child age ----

ssm <- ssm |> 
  dplyr::select(id, dq13_1_2a, dq13_2_2a, dq13_3_2a, dq13_4_2a, dq13_1_2b, dq13_2_2b, dq13_3_2b, dq13_4_2b) |> 
  dplyr::rename(Gengo_1 = dq13_1_2a, 
                Gengo_2 = dq13_2_2a, 
                Gengo_3 = dq13_3_2a, 
                Gengo_4 = dq13_4_2a, 
                Year_1 = dq13_1_2b, 
                Year_2 = dq13_2_2b, 
                Year_3 = dq13_3_2b, 
                Year_4 = dq13_4_2b) |>
  tidyr::pivot_longer(-id, 
                      names_to = c("Date", "BirthOrder"), 
                      names_sep = "_") |> 
  tidyr::pivot_wider(names_from = Date, 
                     values_from = value) |>
  dplyr::mutate(Gengo = dplyr::case_match(Gengo,
                                          1 ~ 1926,
                                          2 ~ 1989,
                                          c(8, 9) ~ NA_integer_), 
                Year = dplyr::case_match(Year,
                                         88:99 ~ NA_integer_, 
                                         .default = Year)) |>
  # tidyr::drop_na(Gengo, Year) |>
  dplyr::mutate(ChildBirth = Gengo + Year) |> 
  dplyr::mutate(ChildAge = 2015 - ChildBirth) |>
  dplyr::select(id, ChildAge, BirthOrder) |>
  dplyr::mutate(Names = "ChildAge") |> 
  tidyr::pivot_wider(names_from = c("Names", "BirthOrder"), 
                     values_from = ChildAge) |> 
  dplyr::inner_join(ssm, ., by = "id")

## Child age over 25
ssm <- ssm |> 
  dplyr::mutate(Child1_25 = dplyr::if_else(ChildAge_1 >= 25, 1, 0),
                Child2_25 = dplyr::if_else(ChildAge_2 >= 25, 1, 0),
                Child3_25 = dplyr::if_else(ChildAge_3 >= 25, 1, 0),
                Child4_25 = dplyr::if_else(ChildAge_4 >= 25, 1, 0))
# |> 
#   tidyr::replace_na(list(Child1_25 = 0, Child2_25 = 0, Child3_25 = 0, Child4_25 = 0))

## Education ----

ssm <- ssm |> 
  dplyr::select(id, dq13_1_5, dq13_2_5, dq13_3_5, dq13_4_5) |> 
  dplyr::rename(Education_1 = dq13_1_5, 
                Education_2 = dq13_2_5, 
                Education_3 = dq13_3_5, 
                Education_4 = dq13_4_5) |>
  tidyr::pivot_longer(-id, 
                      names_to = c("Edu", "BirthOrder"), 
                      names_sep = "_", 
                      values_to = "Education") |> 
  dplyr::select(-Edu) |>
  dplyr::mutate(Education = as.integer(Education), 
                College = dplyr::case_match(Education,
                                            0:5 ~ 0L,
                                            6:7 ~ 1L,
                                            8:99 ~ NA_integer_)) |>
  dplyr::select(id, College, BirthOrder) |>
  dplyr::mutate(Names = "ChildCollege") |> 
  tidyr::pivot_wider(names_from = c("Names", "BirthOrder"), 
                     values_from = College) |> 
  dplyr::inner_join(ssm, ., by = "id")

ssm <- ssm |> dplyr::mutate(Year_child_reach25 = 2015 - (ChildAge_1 - 25), 
                            Year_child_reach25 = dplyr::if_else(Year_child_reach25 > 2015, NA_integer_, Year_child_reach25))



## The Highest Education of Children over 25 ----
### 25歳以上の子どもの学歴、複数いる場合は最高学歴を採用

ssm <- ssm |> 
  dplyr::mutate(ChildCollege_Max = dplyr::case_when(
    (ChildAge_4 >= 25 & ChildCollege_4 == 1L) | (ChildAge_3 >= 25 & ChildCollege_3 == 1L) | (ChildAge_2 >= 25 & ChildCollege_2 == 1L) | (ChildAge_1 >= 25 & ChildCollege_1 == 1L) ~ 1L, 
    TRUE ~ 0L))

# , 
#     CollegeMax_Child_BirthOrder = dplyr::case_when(
#       ChildCollege_Max == 1 & ChildCollege_4 == 1 & ChildAge_4 >= 25 ~ 4L,
#       ChildCollege_Max == 1 & ChildCollege_3 == 1 & ChildAge_3 >= 25 ~ 3L,
#       ChildCollege_Max == 1 & ChildCollege_2 == 1 & ChildAge_2 >= 25 ~ 2L,
#       ChildCollege_Max == 1 & ChildCollege_1 == 1 & ChildAge_1 >= 25 ~ 1L, 
#       TRUE ~ 0), 
#     CollegeMax_Child_Sex = dplyr::case_when(
#       CollegeMax_Child_BirthOrder == 1 ~ ChildSex_1, 
#       CollegeMax_Child_BirthOrder == 2 ~ ChildSex_2, 
#       CollegeMax_Child_BirthOrder == 3 ~ ChildSex_3, 
#       CollegeMax_Child_BirthOrder == 4 ~ ChildSex_4, 
#       ChildCollege_Max == 0 ~ ChildSex_1))

# Z_a/Treatment-mediator confounder: Respondent socdem ----

## Sex

ssm <- ssm |> 
  dplyr::mutate(Sex = forcats::fct_recode(factor(q1_1), 
                                             "Female" = "2", 
                                             "Male" = "1"))

## Birth cohort

ssm <- ssm |> 
  dplyr::mutate(BirthCohort = 2015 - q1_2_5)

## Age

ssm <- ssm |> 
  dplyr::mutate(Age = q1_2_5)

# R_a/Treatment-induced confounder: Respondent education ----

ssm <- ssm |> 
  dplyr::mutate(Education = dplyr::case_match(edssm, 
                                              4:9 ~ "0", 
                                              10:11 ~ "1",  
                                              .default = NA_character_), 
                Education = forcats::fct_recode(factor(Education),  
                                              "University or more" = "1", 
                                              "Less than university" = "0"))

## Region

ssm <- ssm |> 
  dplyr::mutate(Place_15 = dplyr::case_match(q17_1_15, 
                                                1:7 ~ 1L,
                                                c(8:10, 15, 19:20) ~ 2L, 
                                                11:14 ~ 3L, 
                                                c(16:18, 21:24) ~ 4L,
                                                25:30 ~ 5L, 
                                                31:39 ~ 6L, 
                                                40:47 ~ 7L, 
                                                52:99 ~ 8L), 
                Place_15 = forcats::fct_recode(factor(Place_15),  
                                                  "Hokkaido / Tohoku" = "1", 
                                                  "North Kanto / Koshinetsu" = "2", 
                                                  "Metropolitan area" = "3", 
                                                  "Hokuriku / Tokai" = "4", 
                                                  "Kinki" = "5", 
                                                  "Chugoku / Shikoku" = "6", 
                                                  "Kyushu / Okinawa" = "7", 
                                                  "Other" = "8"))

# R_b/Mediator-outcome confounder: Respondent life course ----

## Occupation at first job (JSEI)

ssm <- ssm |> 
  dplyr::mutate(Occ = q8_f) |> 
  dplyr::left_join(jsei, by = "Occ") |> 
  dplyr::rename(Jsei_Fj = sei) |> 
  dplyr::select(-ssi, -Occ)

## Employment Status at first job

ssm <- ssm |> 
  dplyr::mutate(EmpStatus_Fj = dplyr::case_match(q8_a, 
                                                2 ~ 0, 
                                                3:6 ~ 1,
                                                c(1, 7:8) ~ 2,
                                                .default = NA_integer_), 
                EmpStatus_Fj = forcats::fct_recode(factor(EmpStatus_Fj), 
                                                "Self-employed" = "2",
                                                "NonStandard" = "1", 
                                                "Standard" = "0"))

## Unemployment

## (1) Constracting Person-Year Data----
age_dansu <- c(paste0("q9_", seq(1, 22, by = 1), "_c_7"))

py <- ssm |> 
  dplyr::select(-dansu) |> 
  dplyr::rename(q9_1_c_7 = q8_h_1) |> 
  tidyr::pivot_longer(age_dansu, 
                      names_to = c("dansu", "suffix"), 
                      names_prefix = "q9_", 
                      names_sep = "_c_7", 
                      values_to = "num") |> 
  dplyr::group_by(id) |> 
  dplyr::select(id, num, dansu, Age) |> 
  tidyr::complete(num = full_seq(10:81, 1)) |> 
  dplyr::mutate(dansu = as.numeric(dansu)) |> 
  tidyr::fill(dansu, .direction="down") |> 
  tidyr::fill(Age, .direction = "down") |> 
  dplyr::ungroup() |> 
  group_by(id) |> 
  dplyr::distinct(num, .keep_all = TRUE) |>
  dplyr::filter(num >= 20 & num <= 60) |> 
  dplyr::ungroup()

unemp_dansu_a <- c(paste0("q9_", seq(1, 22, by = 1), "_b"))

py <- ssm |> 
  dplyr::select(-dansu) |> 
  dplyr::mutate(q9_1_b = 0) |> 
  tidyr::pivot_longer(unemp_dansu_a, 
                      names_to = c("dansu", "suffix"), 
                      names_prefix = "q9_", 
                      names_sep = "_b", 
                      values_to = "unemp_a") |> 
  dplyr::select(id, dansu, unemp_a) |> 
  dplyr::mutate(dansu = as.numeric(dansu)) |> 
  (\(.) dplyr::left_join(py, ., by = c("id", "dansu")))()

py <- py |> 
  dplyr::mutate(year = (2015 - Age) + num)

py <- ssm |> dplyr::select(id, Year_child_reach25) |> (\(.) dplyr::left_join(py, ., by = "id"))()

  
ssm <- py |> 
  tidyr::replace_na(list(Year_child_reach25 = 0)) |> 
  dplyr::mutate(more_than_25 = dplyr::if_else(year - Year_child_reach25 > 0, 1, 0), 
                Unemp = dplyr::if_else(unemp_a == 2 & more_than_25 == 0, 1, 0)) |> 
  dplyr::group_by(id) |> 
  dplyr::summarise(Unemp = max(Unemp, na.rm = TRUE)) |> 
  dplyr::ungroup() |> (\(.) dplyr::left_join(ssm, ., by = "id"))()

ssm <- ssm |> dplyr::mutate(Unemp = dplyr::if_else(Unemp == 1, 1, 0), 
                            Unemp = forcats::fct_recode(factor(Unemp), 
                                                        "Ever-Unemployment" = "1", 
                                                        "Never-Unemployment" = "0"))

## Divorce

ssm <- ssm |> 
  dplyr::mutate(divorce_age_remarriage = dplyr::if_else(sq2_1 == 88, NA_integer_, sq2_1), 
                widow_age_remarriage = dplyr::if_else(sq2_2 == 88, NA_integer_, sq2_2), 
                divorce_year_remarriage = 2015 - divorce_age_remarriage, 
                widow_year_remarriage = 2015 - widow_age_remarriage,
                is_divorce_beforechild25_remarriage = dplyr::if_else(divorce_year_remarriage < Year_child_reach25, 1, 0), 
                is_divorce_beforechild25_remarriage = dplyr::if_else(is.na(is_divorce_beforechild25_remarriage) == TRUE, 0, is_divorce_beforechild25_remarriage),
                is_widow_beforechild25_remarriage = dplyr::if_else(widow_year_remarriage < Year_child_reach25, 1, 0),
                is_widow_beforechild25_remarriage = dplyr::if_else(is.na(is_widow_beforechild25_remarriage) == TRUE, 0, is_widow_beforechild25_remarriage),
                divorcewidow_age = dplyr::if_else(q34 >= 888, NA_integer_, q34), 
                divorcewidow_year = 2015 - divorcewidow_age, 
                is_divorcewidow_year_beforechild25 = dplyr::if_else(divorcewidow_year < Year_child_reach25, 1, 0), 
                is_divorcewidow_year_beforechild25 = dplyr::if_else(is.na(is_divorcewidow_year_beforechild25) == TRUE, 0, is_divorcewidow_year_beforechild25),
                MarStatus = dplyr::case_when(q25 == 1 ~ 0,
                                             q25 == 2 & q33 == 1 ~ 0,
                                             q25 == 2 & q33 == 2 & (is_divorce_beforechild25_remarriage == 1 | is_widow_beforechild25_remarriage == 1) ~ 1,
                                             q25 == 2 & q33 == 2 & (is_divorce_beforechild25_remarriage == 0 & is_widow_beforechild25_remarriage == 0) ~ 0,
                                             q25 == 3 & is_divorcewidow_year_beforechild25 == 1 ~ 1,
                                             q25 == 4 & is_divorcewidow_year_beforechild25 == 1 ~ 1,
                                             q25 == 3 & is_divorcewidow_year_beforechild25 == 0 ~ 0,
                                             q25 == 4 & is_divorcewidow_year_beforechild25 == 0 ~ 0,
                                             TRUE ~ NA_integer_), 
                MarStatus = forcats::fct_recode(factor(MarStatus), 
                                            "Ever-DivorceWidow" = "1", 
                                            "Never-DivorceWidow" = "0"))

## Num. of children

ssm <- ssm |> 
  dplyr::mutate(ChildNum = dq12)

## Health problem

health_dansu <- c(paste0("q9_", seq(1, 22, by = 1), "_b_1"))

py <- ssm |> 
  dplyr::select(-dansu) |> 
  dplyr::mutate(q9_1_b_1 = 0) |> 
  tidyr::pivot_longer(health_dansu, 
                      names_to = c("dansu", "suffix"), 
                      names_prefix = "q9_", 
                      names_sep = "_b_1", 
                      values_to = "health_a") |> 
  dplyr::select(id, dansu, health_a) |> 
  dplyr::mutate(dansu = as.numeric(dansu)) |> 
  (\(.) dplyr::left_join(py, ., by = c("id", "dansu")))()

health_dansu <- c(paste0("q9_", seq(1, 22, by = 1), "_b_1_9"))

py <- ssm |> 
  dplyr::select(-dansu) |> 
  dplyr::mutate(q9_1_b_1_9 = 0) |> 
  tidyr::pivot_longer(health_dansu, 
                      names_to = c("dansu", "suffix"), 
                      names_prefix = "q9_", 
                      names_sep = "_b_1_9", 
                      values_to = "health_b") |> 
  dplyr::select(id, dansu, health_b) |> 
  dplyr::mutate(dansu = as.numeric(dansu)) |> 
  (\(.) dplyr::left_join(py, ., by = c("id", "dansu")))()

ssm <- py |> 
  tidyr::replace_na(list(Year_child_reach25 = 0)) |> 
  dplyr::mutate(more_than_25 = dplyr::if_else(year - Year_child_reach25 > 0, 1, 0), 
                PastHealthIssue = dplyr::if_else((health_a == 7 | health_b == 13) & more_than_25 == 0, 1, 0)) |> 
  dplyr::group_by(id) |> 
  dplyr::summarise(PastHealthIssue = max(PastHealthIssue, na.rm = TRUE)) |> 
  dplyr::ungroup() |> (\(.) dplyr::left_join(ssm, ., by = "id"))()


ssm <- ssm |> dplyr::mutate(PastHealthIssue = dplyr::if_else(PastHealthIssue == 1, 1, 0), 
                            PastHealthIssue = forcats::fct_recode(factor(PastHealthIssue),
                                                                  "Ever-HealthIssue" = "1",
                                                                  "Never-HealthIssue" = "0"))

